Zooplankton Compositions

Code
#### Loading Packages 

library(shiny)
library(tidyverse) ; library(dplyr) ; library(ggplot2)
library(gridExtra) ; library(tidyr) ; library(robCompositions) ; library(viridis)  ; library(plotly)

#### Loading functions 
source("https://raw.githubusercontent.com/rafaelcatoia/zoop_16N/main/ALR_CLR_distMatrices.R")
source("https://raw.githubusercontent.com/rafaelcatoia/zoop_16N/main/aggregating_composition_new.R")
source("https://raw.githubusercontent.com/rafaelcatoia/zoop_16N/main/ggLatDepth.R")
source("https://raw.githubusercontent.com/rafaelcatoia/zoop_16N/main/ggLatDepth.R")
source('https://raw.githubusercontent.com/rafaelcatoia/zoop_16N/main/ggLatDepth_temp.R')
source('https://raw.githubusercontent.com/rafaelcatoia/zoop_16N/main/plot_mds2d.R')
source('https://raw.githubusercontent.com/rafaelcatoia/zoop_16N/main/plot_mds3d.R')
source('https://raw.githubusercontent.com/rafaelcatoia/zoop_16N/main/functions.R')
source('https://raw.githubusercontent.com/rafaelcatoia/zoop_16N/main/clusterize_medoids.R')
source('https://raw.githubusercontent.com/rafaelcatoia/zoop_16N/main/clusterize_hclustW2.R')
#### Loading data

metadat = data.table::fread('https://raw.githubusercontent.com/rafaelcatoia/zoop_16N/main/metadat_abiotic.csv') %>% as_tibble()

dat_tax = data.table::fread('https://raw.githubusercontent.com/rafaelcatoia/zoop_16N/main/treated_taxonomy_dat.csv') %>% 
  as_tibble()

dat_tax = dat_tax %>% mutate(
  ASV=ifelse(
    1:nrow(dat_tax)<10,
    paste('ID000',1:nrow(dat_tax),sep=''),
    ifelse(1:nrow(dat_tax)<100,
           paste('ID00',1:nrow(dat_tax),sep=''),
           ifelse(1:nrow(dat_tax)<1000,
                  paste('ID0',1:nrow(dat_tax),sep=''),
                  paste('ID',1:nrow(dat_tax),sep=''))))
                          
)
Code
###### Creating all clusters ---------------------
vec_aggregation <- 
  c('PG','SG1','Supergroup','Division',
    'Class','Order','Family','Genus','Species','ASV')


# datadir = "/Users/rafaelcatoia/MyDrive/20_UCSC/Capstone/00_Data/Pacific Ocean/"
# 
# listW2 <- clusterize_hclustW2(data_taxonomy = dat_tax,
#                               vec_aggreg = vec_aggregation,
#                               max_k = 50,max_at_least = 3)
# 
# listMedois <- clusterize_kmedoids(data_taxonomy = dat_tax,
#                                   vec_aggreg = vec_aggregation,
#                                   max_k = 50,max_at_least = 3)
# 
# saveRDS(listW2,file = "/Users/rafaelcatoia/Desktop/repos/Capstone/listW2")
# saveRDS(listMedois,file = "/Users/rafaelcatoia/Desktop/repos/Capstone/listMedois")
# 
#listW2<-readRDS("/Users/rafaelcatoia/Desktop/repos/Capstone/listW2")
#listMedois<-readRDS("/Users/rafaelcatoia/Desktop/repos/Capstone/listMedois")

In process

First and more straight forward method = counting the number of different ASVs in each sample.

Code
ASV_Over_Samples <- data.frame(
    ASV_Id = dat_tax$ASV,
    KeyAsv = 1:length(dat_tax$ASV),
    PresentInNSamples = 195 - apply(dat_tax %>% select(starts_with("P16")),1,function(x){sum(x==0)})
    ) %>% arrange(-PresentInNSamples) %>% mutate(ASVNumber=1:n())

Samples_Over_ASV<- data.frame(
  Sample = dat_tax %>% select(starts_with("P16")) %>% colnames(),
  NumberOfASVs = apply(dat_tax %>% select(starts_with("P16")),2,function(x){sum(x!=0)}) 
) %>% arrange(-NumberOfASVs) %>% mutate(SampleNumber= 1:n())


ASV_Over_Samples %>% 
  ggplot(aes(x=ASVNumber,y=PresentInNSamples))+
  geom_point()+
  theme_minimal()
Samples_Over_ASV %>% 
  ggplot(aes(x=SampleNumber,y=NumberOfASVs))+
  geom_point()+
  theme_minimal()

Code
# ## Creating all Clusters
# listW2 <- clusterize_MixDist_hclustW2(
#   data_taxonomy = dat_tax,
#   vec_aggreg = vec_aggregation,
#   data_metadat = metadat,
#   max_k = 50,max_at_least = 3)
# 
# listMedois <- clusterize_MixDist_kmedoids(
#   data_taxonomy = dat_tax,
#   vec_aggreg = vec_aggregation,
#   data_metadat = metadat,
#   max_k = 50,max_at_least = 3)
# 
# 
# ##### Calculating geocoherence ------------------
# 
# ########### i is for removing at least
# ########### j is for aggregation level
# ########### k is for clustersize
# ########### i=2 ; j = 9, k = 10
# 
# # dat_tax_aux <- aggregating_compositions(
# #   dFrame = dat_tax,
# #   fillZeros = 'Nothing',
# #   aggregating_level = vec_aggregation[1],
# #   PresentAtLeast = 1,
# #   metadata = metadat
# # ) %>% mutate(
# #   OBS=1:n()) %>% 
# #   select(OBS,Latitude,Depth,Pressure_decibars,
# #          Salinity_psu,Temperature_degrees_Celsius) %>% 
# #   left_join(
# #     metadat %>% select(Samples,Latitude,Depth) %>% 
# #       arrange(Samples,Depth) %>% 
# #       group_by(Latitude) %>% mutate(DepthRank=1:n()))
# # 
# # 
# # outputSS_MixDist <- list()
# # kmax = length(listW2$AtLeast1$PG)
# # for (i in 1:3){
# #   
# #   outputAgg <- list()
# #   for(j in 1:length(vec_aggregation)){
# #     
# #     outputDistances <- list()
# #     for(k in 1:kmax){
# #       
# #       dat_tax_aux = 
# #         dat_tax_aux %>% mutate(
# #           Ward_Clust = factor(listW2[[i]][[j]][[k]]),
# #           Medoid_Clust = factor(listMedois[[i]][[j]][[k]]))
# #       
# #       outputDistances[[k]] <- list(
# #         Ward=dat_tax_aux %>% geoCoherense(ClustVar = 'Ward_Clust',DephtVar = 'DepthRank'),
# #         Medoid=dat_tax_aux %>% geoCoherense(ClustVar = 'Medoid_Clust',DephtVar = 'DepthRank'))
# #     }
# #     names(outputDistances) <- ifelse(1:kmax < 10,
# #                                      paste('Cluster0',1:kmax,sep=''),
# #                                      paste('Cluster',1:kmax,sep=''))
# #     outputAgg[[j]]<-outputDistances
# #     rm(outputDistances)
# #   }
# #   
# #   names(outputAgg) <- vec_aggregation
# #   outputSS_MixDist[[i]] <- outputAgg
# #   rm(outputAgg)
# # }
# # 
# # names(outputSS_MixDist) <- paste('AtLeastIn',1:3,sep='')
# # 
# # saveRDS(object = outputSS_MixDist,file = 'outputSS_MixDist')
# outputSS_MixDist=readRDS('outputSS_MixDist')
# 
# df_outputSS_MixDist <- outputSS_MixDist %>% plyr::ldply(function(atleast){
#   atleast %>% plyr::ldply(function(agglevel){
#     agglevel %>% plyr::ldply(function(cluster){
#       cluster %>% plyr::ldply(function(method){
#         method %>% plyr::ldply(function(dimension){
#           return(data.frame(TotalSum = sum(dimension$SumDist)))
#         }, .id = "dimension")
#       }, .id = "method")
#     }, .id = "clusters")
#   }, .id = "agglevel")
# }, .id = "atleast")
#  
# data.table::fwrite(df_outputSS_MixDist,'/Users/rafaelcatoia/Desktop/repos/Capstone/df_outputSS_MixDist.csv')
df_outputSS_MixDist <- data.table::fread('https://raw.githubusercontent.com/rafaelcatoia/zoop_16N/main/df_outputSS_MixDist.csv')
df_outputSS_MixDist_0.2 = df_outputSS_MixDist %>% mutate(AlphaGeo=0.2)
Code
## Creating all Clusters
# listW2 <- clusterize_MixDist_hclustW2(
#   data_taxonomy = dat_tax,
#   vec_aggreg = vec_aggregation,
#   data_metadat = metadat,
#   propGeo = 0.1,
#   max_k = 50,max_at_least = 3)
# 
# listMedois <- clusterize_MixDist_kmedoids(
#   data_taxonomy = dat_tax,
#   vec_aggreg = vec_aggregation,
#   data_metadat = metadat,
#   propGeo = 0.1,
#   max_k = 50,max_at_least = 3)
#  
#  
# #### Calculating geocoherence ------------------
# 
# ########## i is for removing at least
# ########## j is for aggregation level
# ########## k is for clustersize
# ########## i=2 ; j = 9, k = 10
# 
#  dat_tax_aux <- aggregating_compositions(
#    dFrame = dat_tax,
#    fillZeros = 'Nothing',
#    aggregating_level = vec_aggregation[1],
#    PresentAtLeast = 1,
#    metadata = metadat
#  ) %>% mutate(
#    OBS=1:n()) %>% 
#    select(OBS,Latitude,Depth,Pressure_decibars,
#           Salinity_psu,Temperature_degrees_Celsius) %>% 
#    left_join(
#      metadat %>% select(Samples,Latitude,Depth) %>% 
#        arrange(Samples,Depth) %>% 
#        group_by(Latitude) %>% mutate(DepthRank=1:n()))
#  
#  
#  outputSS_MixDist <- list()
#  kmax = length(listW2$AtLeast1$PG)
#  for (i in 1:3){
#    
#    outputAgg <- list()
#    for(j in 1:length(vec_aggregation)){
#      
#      outputDistances <- list()
#      for(k in 1:kmax){
#        
#        dat_tax_aux = 
#          dat_tax_aux %>% mutate(
#            Ward_Clust = factor(listW2[[i]][[j]][[k]]),
#            Medoid_Clust = factor(listMedois[[i]][[j]][[k]]))
#        
#        outputDistances[[k]] <- list(
#          Ward=dat_tax_aux %>% geoCoherense(ClustVar = 'Ward_Clust',DephtVar = 'DepthRank'),
#          Medoid=dat_tax_aux %>% geoCoherense(ClustVar = 'Medoid_Clust',DephtVar = 'DepthRank'))
#      }
#      names(outputDistances) <- ifelse(1:kmax < 10,
#                                       paste('Cluster0',1:kmax,sep=''),
#                                       paste('Cluster',1:kmax,sep=''))
#      outputAgg[[j]]<-outputDistances
#      rm(outputDistances)
#    }
#    
#    names(outputAgg) <- vec_aggregation
#    outputSS_MixDist[[i]] <- outputAgg
#    rm(outputAgg)
#  }
#  
#  names(outputSS_MixDist) <- paste('AtLeastIn',1:3,sep='')
#  
#  saveRDS(object = outputSS_MixDist,file = 'outputSS_MixDist')
#  outputSS_MixDist=readRDS('outputSS_MixDist')
#  
#  df_outputSS_MixDist <- outputSS_MixDist %>% plyr::ldply(function(atleast){
#    atleast %>% plyr::ldply(function(agglevel){
#      agglevel %>% plyr::ldply(function(cluster){
#        cluster %>% plyr::ldply(function(method){
#          method %>% plyr::ldply(function(dimension){
#            return(data.frame(TotalSum = sum(dimension$SumDist)))
#          }, .id = "dimension")
#        }, .id = "method")
#      }, .id = "clusters")
#    }, .id = "agglevel")
#  }, .id = "atleast")
#   
# df_outputSS_MixDist_0.1 = df_outputSS_MixDist %>% mutate(AlphaGeo = 0.1)
#  
# data.table::fwrite(df_outputSS_MixDist_0.1,'/Users/rafaelcatoia/Desktop/repos/Capstone/df_outputSS_MixDist_0.1.csv')
df_outputSS_MixDist_0.1 <- data.table::fread('https://raw.githubusercontent.com/rafaelcatoia/zoop_16N/main/df_outputSS_MixDist_0.1.csv')
Code
## Creating all Clusters
# listW2 <- clusterize_MixDist_hclustW2(
#   data_taxonomy = dat_tax,
#   vec_aggreg = vec_aggregation,
#   data_metadat = metadat,
#   propGeo = 0.35,
#   max_k = 50,max_at_least = 3)
# 
# listMedois <- clusterize_MixDist_kmedoids(
#   data_taxonomy = dat_tax,
#   vec_aggreg = vec_aggregation,
#   data_metadat = metadat,
#   propGeo = 0.35,
#   max_k = 50,max_at_least = 3)
#  
#  
# #### Calculating geocoherence ------------------
# 
# ########## i is for removing at least
# ########## j is for aggregation level
# ########## k is for clustersize
# ########## i=2 ; j = 9, k = 10
# 
#  dat_tax_aux <- aggregating_compositions(
#    dFrame = dat_tax,
#    fillZeros = 'Nothing',
#    aggregating_level = vec_aggregation[1],
#    PresentAtLeast = 1,
#    metadata = metadat
#  ) %>% mutate(
#    OBS=1:n()) %>% 
#    select(OBS,Latitude,Depth,Pressure_decibars,
#           Salinity_psu,Temperature_degrees_Celsius) %>% 
#    left_join(
#      metadat %>% select(Samples,Latitude,Depth) %>% 
#        arrange(Samples,Depth) %>% 
#        group_by(Latitude) %>% mutate(DepthRank=1:n()))
#  
#  
#  outputSS_MixDist <- list()
#  kmax = length(listW2$AtLeast1$PG)
#  for (i in 1:3){
#    
#    outputAgg <- list()
#    for(j in 1:length(vec_aggregation)){
#      
#      outputDistances <- list()
#      for(k in 1:kmax){
#        
#        dat_tax_aux = 
#          dat_tax_aux %>% mutate(
#            Ward_Clust = factor(listW2[[i]][[j]][[k]]),
#            Medoid_Clust = factor(listMedois[[i]][[j]][[k]]))
#        
#        outputDistances[[k]] <- list(
#          Ward=dat_tax_aux %>% geoCoherense(ClustVar = 'Ward_Clust',DephtVar = 'DepthRank'),
#          Medoid=dat_tax_aux %>% geoCoherense(ClustVar = 'Medoid_Clust',DephtVar = 'DepthRank'))
#      }
#      names(outputDistances) <- ifelse(1:kmax < 10,
#                                       paste('Cluster0',1:kmax,sep=''),
#                                       paste('Cluster',1:kmax,sep=''))
#      outputAgg[[j]]<-outputDistances
#      rm(outputDistances)
#    }
#    
#    names(outputAgg) <- vec_aggregation
#    outputSS_MixDist[[i]] <- outputAgg
#    rm(outputAgg)
#  }
#  
#  names(outputSS_MixDist) <- paste('AtLeastIn',1:3,sep='')
#  
#  saveRDS(object = outputSS_MixDist,file = 'outputSS_MixDist')
#  outputSS_MixDist=readRDS('outputSS_MixDist')
#  
#  df_outputSS_MixDist <- outputSS_MixDist %>% plyr::ldply(function(atleast){
#    atleast %>% plyr::ldply(function(agglevel){
#      agglevel %>% plyr::ldply(function(cluster){
#        cluster %>% plyr::ldply(function(method){
#          method %>% plyr::ldply(function(dimension){
#            return(data.frame(TotalSum = sum(dimension$SumDist)))
#          }, .id = "dimension")
#        }, .id = "method")
#      }, .id = "clusters")
#    }, .id = "agglevel")
#  }, .id = "atleast")
#   
# df_outputSS_MixDist_0.35 = df_outputSS_MixDist %>% mutate(AlphaGeo = 0.35)
#  
# data.table::fwrite(df_outputSS_MixDist_0.35,'/Users/rafaelcatoia/Desktop/repos/Capstone/df_outputSS_MixDist_0.35.csv')
df_outputSS_MixDist_0.35 <- data.table::fread('https://raw.githubusercontent.com/rafaelcatoia/zoop_16N/main/df_outputSS_MixDist_0.35.csv')

Here the idea is to verify if the clusters created in the compositions, are geo-spatial coherent.

The first try was to, given the clusters:

  • sum the distances between samples within the same cluster
    • related to Latitude and Depth - Using the Euclidian distance (no transformations until now)
    • related to Abiotic Features (Temperature, Salinity and Pressure)
Code
# i is for removing at least
# j is for aggregation level
# k is for clustersize
#i=2 ; j = 9, k = 10

#dat_tax_aux <- aggregating_compositions(
#  dFrame = dat_tax,
#  fillZeros = 'Nothing',
#  aggregating_level = vec_aggregation[1],
#  PresentAtLeast = 1,
#  metadata = metadat
#) %>% mutate(
#  OBS=1:n()) %>% 
#  select(OBS,Latitude,Depth,Pressure_decibars,
#         Salinity_psu,Temperature_degrees_Celsius)
#
#outputSS <- list()
#kmax = length(listW2$AtLeast1$PG)
#for (i in 1:3){
#  
#  outputAgg <- list()
#  for(j in 1:length(vec_aggregation)){
#    
#    outputDistances <- list()
#    for(k in 1:kmax){
#      
#      dat_tax_aux = 
#        dat_tax_aux %>% mutate(
#          Ward_Clust = factor(listW2[[i]][[j]][[k]]),
#          Medoid_Clust = factor(listMedois[[i]][[j]][[k]]))
#      
#      outputDistances[[k]] <- list(
#        Ward=dat_tax_aux %>% geoCoherense(ClustVar = 'Ward_Clust',DephtVar = 'DepthRank'),
#        Medoid=dat_tax_aux %>% geoCoherense(ClustVar = 'Medoid_Clust',DephtVar = 'DepthRank'))
#    }
#    names(outputDistances) <- ifelse(1:kmax < 10,
#                                     paste('Cluster0',1:kmax,sep=''),
#                                     paste('Cluster',1:kmax,sep=''))
#    outputAgg[[j]]<-outputDistances
#    rm(outputDistances)
#  }
#  
#  names(outputAgg) <- vec_aggregation
#  outputSS[[i]] <- outputAgg
#  rm(outputAgg)
#}
#
#names(outputSS) <- paste('AtLeastIn',1:3,sep='')
#
#saveRDS(object = outputSS,file = 'outputSS')
#outputSS=readRDS('outputSS')
#
#df_outputSS <- outputSS %>% plyr::ldply(function(atleast){
#  atleast %>% plyr::ldply(function(agglevel){
#    agglevel %>% plyr::ldply(function(cluster){
#      cluster %>% plyr::ldply(function(method){
#        method %>% plyr::ldply(function(dimension){
#          return(data.frame(TotalSum = sum(dimension$SumDist)))
#        }, .id = "dimension")
#      }, .id = "method")
#    }, .id = "clusters")
#  }, .id = "agglevel")
#}, .id = "atleast")
# 
#data.table::fwrite(df_outputSS,'/Users/rafaelcatoia/Desktop/repos/Capstone/df_outputSS.csv')
df_outputSS <- data.table::fread('https://raw.githubusercontent.com/rafaelcatoia/zoop_16N/main/df_outputSS.csv')


#######--- Doing the same thing now, but wit the rank instead of the true Depht

# dat_tax_aux <- aggregating_compositions(
#   dFrame = dat_tax,
#   fillZeros = 'Nothing',
#   aggregating_level = vec_aggregation[1],
#   PresentAtLeast = 1,
#   metadata = metadat
# ) %>% mutate(
#   OBS=1:n()) %>% 
#   select(OBS,Latitude,Depth,Pressure_decibars,
#          Salinity_psu,Temperature_degrees_Celsius) %>% 
#   left_join(
#     metadat %>% select(Samples,Latitude,Depth) %>% 
#   arrange(Samples,Depth) %>% 
#   group_by(Latitude) %>% mutate(DepthRank=1:n()))
# 
# outputSS_Rank <- list()
# kmax = length(listW2$AtLeast1$PG)
# for (i in 1:3){
#   
#   outputAgg <- list()
#   for(j in 1:length(vec_aggregation)){
#     
#     outputDistances <- list()
#     for(k in 1:kmax){
#       
#       dat_tax_aux = 
#         dat_tax_aux %>% mutate(
#           Ward_Clust = factor(listW2[[i]][[j]][[k]]),
#           Medoid_Clust = factor(listMedois[[i]][[j]][[k]]))
#       
#       outputDistances[[k]] <- list(
#         Ward=dat_tax_aux %>% geoCoherense(ClustVar = 'Ward_Clust',DephtVar = 'DepthRank'),
#         Medoid=dat_tax_aux %>% geoCoherense(ClustVar = 'Medoid_Clust',DephtVar = 'DepthRank'))
#     }
#     names(outputDistances) <- ifelse(1:kmax < 10,
#                                      paste('Cluster0',1:kmax,sep=''),
#                                      paste('Cluster',1:kmax,sep=''))
#     outputAgg[[j]]<-outputDistances
#     rm(outputDistances)
#   }
#   
#   names(outputAgg) <- vec_aggregation
#   outputSS_Rank[[i]] <- outputAgg
#   rm(outputAgg)
# }
# 
# names(outputSS_Rank) <- paste('AtLeastIn',1:3,sep='')
# 
# saveRDS(object = outputSS_Rank,file = 'outputSS_Rank')
# outputSS_Rank=readRDS('outputSS_Rank')
# 
# df_outputSS_Rank <- outputSS_Rank %>% plyr::ldply(function(atleast){
#   atleast %>% plyr::ldply(function(agglevel){
#     agglevel %>% plyr::ldply(function(cluster){
#       cluster %>% plyr::ldply(function(method){
#         method %>% plyr::ldply(function(dimension){
#           return(data.frame(TotalSum = sum(dimension$SumDist)))
#         }, .id = "dimension")
#       }, .id = "method")
#     }, .id = "clusters")
#   }, .id = "agglevel")
# }, .id = "atleast")

#data.table::fwrite(df_outputSS_Rank,'/Users/rafaelcatoia/Desktop/repos/Capstone/df_outputSS_Rank.csv')
df_outputSS_Rank <- data.table::fread('https://raw.githubusercontent.com/rafaelcatoia/zoop_16N/main/df_outputSS_Rank.csv')
Code
df_outputSS %>% 
  mutate(NumberOfClusters = as.integer(gsub('Cluster','',clusters)),
         Dimension = ifelse(dimension=='df_LatDephtDistance','GeoSpatial','Abiotics'),
         agglevel=factor(agglevel,levels=vec_aggregation)) %>%
  filter(Dimension=='GeoSpatial') %>% 
  ggplot(aes(x=NumberOfClusters,y=log(TotalSum),color=method,linetype=atleast))+
  geom_line(linewidth=0.75,alpha=0.75)+
  facet_wrap(~agglevel,nrow=2)+
  theme_minimal()+
  theme(legend.position = 'bottom')

Code
df_outputSS %>% 
  mutate(NumberOfClusters = as.integer(gsub('Cluster','',clusters)),
         Dimension = ifelse(dimension=='df_LatDephtDistance','GeoSpatial','Abiotics'),
         agglevel=factor(agglevel,levels=vec_aggregation)) %>%
  filter(Dimension=='GeoSpatial') %>% 
  ggplot(aes(x=NumberOfClusters,y=log(TotalSum),color=agglevel))+
  geom_line(linewidth=1,alpha=0.75)+
  facet_grid(atleast~method)+
  theme_minimal()+
  theme(legend.position = 'bottom')+
  viridis::scale_color_viridis(discrete = T)

Code
bind_rows(
  df_outputSS_Rank %>% mutate(Dist='Rank',AlphaGeo='0'),
  df_outputSS_MixDist_0.1 %>% mutate(Dist='Rank',AlphaGeo = '0.1'),
  df_outputSS_MixDist_0.2 %>% mutate(Dist='Rank',AlphaGeo = '0.2'),
  df_outputSS_MixDist_0.35 %>% mutate(Dist='Rank',AlphaGeo = '0.35')
) %>% mutate(NumberOfClusters = as.integer(gsub('Cluster','',clusters)),
         Dimension = ifelse(dimension=='df_LatDephtDistance','GeoSpatial','Abiotics'),
         agglevel=factor(agglevel,levels=vec_aggregation)) %>%
  filter(Dimension=='GeoSpatial',agglevel!='PG',agglevel!='SG1') %>% 
  ggplot(aes(x=NumberOfClusters,y=log(TotalSum),color=AlphaGeo,linetype=method))+
  geom_line(linewidth=0.75,alpha=0.75)+
  facet_grid(atleast~agglevel)+
  theme_minimal()+
  theme(legend.position = 'bottom')

Code
bind_rows(
  df_outputSS_Rank %>% mutate(Dist='Rank',AlphaGeo='0'),
  df_outputSS_MixDist_0.1 %>% mutate(Dist='Rank',AlphaGeo = '0.1'),
  df_outputSS_MixDist_0.2 %>% mutate(Dist='Rank',AlphaGeo = '0.2'),
  df_outputSS_MixDist_0.35 %>% mutate(Dist='Rank',AlphaGeo = '0.35')
)  %>% mutate(NumberOfClusters = as.integer(gsub('Cluster','',clusters)),
         Dimension = ifelse(dimension=='df_LatDephtDistance','GeoSpatial','Abiotics'),
         agglevel=factor(agglevel,levels=vec_aggregation)) %>%
  filter(Dimension=='GeoSpatial',agglevel!='PG',agglevel!='SG1') %>% 
  ggplot(aes(x=NumberOfClusters,y=log(TotalSum),color=agglevel,linetype=method))+
  geom_line(linewidth=1,alpha=0.75)+
  facet_grid(atleast~AlphaGeo)+
  theme_minimal()+
  theme(legend.position = 'bottom')+
  viridis::scale_color_viridis(discrete = T)

Code
df_outputSS %>% 
  mutate(NumberOfClusters = as.integer(gsub('Cluster','',clusters)),
         Dimension = ifelse(dimension=='df_LatDephtDistance','GeoSpatial','Abiotics'),
         agglevel=factor(agglevel,levels=vec_aggregation)) %>%
  filter(Dimension=='Abiotics') %>% 
  ggplot(aes(x=NumberOfClusters,y=log(TotalSum),color=method,linetype=atleast))+
  geom_line(linewidth=0.75,alpha=0.75)+
  facet_wrap(~agglevel,nrow=2)+
  theme_minimal()+
  theme(legend.position = 'bottom')

Code
df_outputSS %>% 
  mutate(NumberOfClusters = as.integer(gsub('Cluster','',clusters)),
         Dimension = ifelse(dimension=='df_LatDephtDistance','GeoSpatial','Abiotics'),
         agglevel=factor(agglevel,levels=vec_aggregation)) %>%
  filter(Dimension=='Abiotics') %>% 
  ggplot(aes(x=NumberOfClusters,y=log(TotalSum),color=agglevel))+
  geom_line(linewidth=1,alpha=0.75)+
  facet_grid(atleast~method)+
  theme_minimal()+
  theme(legend.position = 'bottom')+
  viridis::scale_color_viridis(discrete = T)

Code
bind_rows(
  df_outputSS_Rank %>% mutate(Dist='Rank',AlphaGeo='0'),
  df_outputSS_MixDist_0.1 %>% mutate(Dist='Rank',AlphaGeo = '0.1'),
  df_outputSS_MixDist_0.2 %>% mutate(Dist='Rank',AlphaGeo = '0.2'),
  df_outputSS_MixDist_0.35 %>% mutate(Dist='Rank',AlphaGeo = '0.35')
) %>% mutate(NumberOfClusters = as.integer(gsub('Cluster','',clusters)),
         Dimension = ifelse(dimension=='df_LatDephtDistance','GeoSpatial','Abiotics'),
         agglevel=factor(agglevel,levels=vec_aggregation)) %>%
  filter(Dimension=='Abiotics',agglevel!='PG',agglevel!='SG1') %>% 
  ggplot(aes(x=NumberOfClusters,y=log(TotalSum),color=AlphaGeo,linetype=method))+
  geom_line(linewidth=0.75,alpha=0.75)+
  facet_grid(atleast~agglevel)+
  theme_minimal()+
  theme(legend.position = 'bottom')

Code
bind_rows(
  df_outputSS_Rank %>% mutate(Dist='Rank',AlphaGeo='0'),
  df_outputSS_MixDist_0.1 %>% mutate(Dist='Rank',AlphaGeo = '0.1'),
  df_outputSS_MixDist_0.2 %>% mutate(Dist='Rank',AlphaGeo = '0.2'),
  df_outputSS_MixDist_0.35 %>% mutate(Dist='Rank',AlphaGeo = '0.35')
)  %>% mutate(NumberOfClusters = as.integer(gsub('Cluster','',clusters)),
         Dimension = ifelse(dimension=='df_LatDephtDistance','GeoSpatial','Abiotics'),
         agglevel=factor(agglevel,levels=vec_aggregation)) %>%
  filter(Dimension=='Abiotics',agglevel!='PG',agglevel!='SG1') %>% 
  ggplot(aes(x=NumberOfClusters,y=log(TotalSum),color=agglevel,linetype=method))+
  geom_line(linewidth=1,alpha=0.75)+
  facet_grid(atleast~AlphaGeo)+
  theme_minimal()+
  theme(legend.position = 'bottom')+
  viridis::scale_color_viridis(discrete = T)

testing